# UNHCR
# Author: Connor Kelly
# Date: Nov 23, 2020

library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
library(plotly)
library(janitor)
setwd("C:/Users/Connor/Desktop/SDL")
flow <- read_excel("data/UNHCR/new_ref_arrival_new_asy_app_1962_2019.xlsx", skip = 1)

##########
# Flow
##########
flow[is.na(flow)] <- 0 # Replace missing values as 0

# Start by looking at origin
flow_origin <- flow %>% # Aggregate by origin
  group_by(`Origin`, `Population type`) %>%
  summarise_if(is.numeric, sum)
  
flow_sum <- flow_origin %>% # Create new population type "total" equal to sum of asylum seekers and refugees
  group_by(`Origin`) %>%
  summarise_if(is.numeric, sum) 
flow_sum$`Population type` <- "Total" # Specify population type name
flow_sum <- flow_sum[,c(1, 60, 2:59)] # Reorder to standard format

flow_origin <- bind_rows(flow_origin, flow_sum) # Add total to original dataframe

# Priority Countries
priority_origin <- data.frame(t(flow_sum)) # Transpose dataframe for ease of use
  colnames(priority_origin) <- flow_sum$Origin
  priority_origin <- priority_origin[-c(1,2),]
  priority_origin$year <- rownames(priority_origin)
  priority_origin$year <- as.numeric(priority_origin$year)
  priority_origin <- priority_origin[c(231, 1:230)]

  priority_origin[, 1:231] <- sapply(priority_origin[, 1:231], as.character) # Convert data to numeric
  # Unclear why I need to first convert from factor to character and THEN character to numeric but this works
  priority_origin[, 1:231] <- sapply(priority_origin[, 1:231], as.numeric)

Flows from country of origin

# Afghanistan
afg_p <- ggplot(priority_origin, aes(year, Afghanistan)) + geom_line() + ggtitle("Afghanistan") + ylab("Refugees and Asylum Seekers")
ggplotly(afg_p) 
# Burundi
bdi_p <- ggplot(priority_origin, aes(year, Burundi)) + geom_line() + ggtitle("Burundi") + ylab("Refugees and Asylum Seekers")
ggplotly(bdi_p)
# Chad
tcd_p <- ggplot(priority_origin, aes(year, Chad)) + geom_line() + ggtitle("Chad") + ylab("Refugees and Asylum Seekers")
ggplotly(tcd_p)
# Iraq
irq_p <- ggplot(priority_origin, aes(year, Iraq)) + geom_line() + ggtitle("Iraq") + ylab("Refugees and Asylum Seekers")
ggplotly(irq_p)
# Syria
syr_p <- ggplot(priority_origin, aes(year, `Syrian Arab Rep.`)) + geom_line() + ggtitle("Syria") + ylab("Refugees and Asylum Seekers")
ggplotly(syr_p)
# Libya
lby_p <- ggplot(priority_origin, aes(year, Libya)) + geom_line() + ggtitle("Libya") + ylab("Refugees and Asylum Seekers")
ggplotly(lby_p)
# Sudan
sdn_p <- ggplot(priority_origin, aes(year, Sudan)) + geom_line() + ggtitle("Sudan") + ylab("Refugees and Asylum Seekers")
ggplotly(sdn_p)
# South Sudan
ssd_p <- ggplot(priority_origin, aes(year, `South Sudan`)) + geom_line() + ggtitle("South Sudan") + ylab("Refugees and Asylum Seekers")
ggplotly(ssd_p)
# Mali
mli_p <- ggplot(priority_origin, aes(year, Mali)) + geom_line() + ggtitle("Mali") + ylab("Refugees and Asylum Seekers")
ggplotly(mli_p)
# Nigeria
nga_p <- ggplot(priority_origin, aes(year, Nigeria)) + geom_line() + ggtitle("Nigeria") + ylab("Refugees and Asylum Seekers")
ggplotly(nga_p)
# Somalia
som_p <- ggplot(priority_origin, aes(year, Somalia)) + geom_line() + ggtitle("Somalia") + ylab("Refugees and Asylum Seekers")
ggplotly(som_p)
# Guatemala
gtm_p <- ggplot(priority_origin, aes(year, Guatemala)) + geom_line() + ggtitle("Guatemala") + ylab("Refugees and Asylum Seekers")
ggplotly(gtm_p)
# Venezuela
ven_p <- ggplot(priority_origin, aes(year, `Venezuela (Bolivarian Republic of)`)) + geom_line() + ggtitle("Venezuela") + ylab("Refugees and Asylum Seekers")
ggplotly(ven_p)
##########

Destinations of asylum seekers

# Flow Destination (Country of asylum)

# Keep observations with origins in priority countries
flow_dest <- flow %>% filter(
  Origin %in% c("Afghanistan", "Burundi", "Chad", "Iraq", "Syrian Arab Rep.", "Libya", "Sudan", "South Sudan", "Mali",
                "Nigeria", "Somalia", "Guatemala", "Venezuela (Bolivarian Republic of)"))
flow_dest <- flow_dest[, c(2, 1, 3:61)]

# Start by looking at asylum seekers and refugees separately, later combine
# Asylum Seekers

# Looks like I'll have to do this country by country, less elegant than when only looking at origin

# Afghanistan
afg <- flow_dest %>% filter(
  `Origin` == "Afghanistan", `Population type` == "Asylum-seekers"
  )
afg <- afg[, !(names(afg) %in% c("Origin", "Population type"))]

afg <- pivot_longer(afg, # Make wide data long for easier plotting
             cols = c(2:59),
             names_to = "year",
             values_to = "asylumseekers")


afg$`Country of asylum` <- as.factor(afg$`Country of asylum`)
afg$year <- as.numeric(afg$year)
afg$asylumseekers <- as.numeric(afg$asylumseekers)

afg_p <- ggplot(afg, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
    ggtitle("Destinations of Asylum Seekers from Afghanistan") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(afg_p)
# Burundi
bdi <- flow_dest %>% filter(
  `Origin` == "Burundi", `Population type` == "Asylum-seekers"
  )
bdi <- bdi[, !(names(bdi) %in% c("Origin", "Population type"))]

bdi <- pivot_longer(bdi, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
bdi$`Country of asylum` <- as.factor(bdi$`Country of asylum`)
bdi$year <- as.numeric(bdi$year)
bdi$asylumseekers <- as.numeric(bdi$asylumseekers)

bdi_p <- ggplot(bdi, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Burundi") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(bdi_p)
# Chad
tcd <- flow_dest %>% filter(
  `Origin` == "Chad", `Population type` == "Asylum-seekers"
)
tcd <- tcd[, !(names(tcd) %in% c("Origin", "Population type"))]

tcd <- pivot_longer(tcd, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
tcd$`Country of asylum` <- as.factor(tcd$`Country of asylum`)
tcd$year <- as.numeric(tcd$year)
tcd$asylumseekers <- as.numeric(tcd$asylumseekers)

tcd_p <- ggplot(tcd, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Chad") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(tcd_p)
# Iraq
irq <- flow_dest %>% filter(
  `Origin` == "Iraq", `Population type` == "Asylum-seekers"
)
irq <- irq[, !(names(irq) %in% c("Origin", "Population type"))]

irq <- pivot_longer(irq, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
irq$`Country of asylum` <- as.factor(irq$`Country of asylum`)
irq$year <- as.numeric(irq$year)
irq$asylumseekers <- as.numeric(irq$asylumseekers)

irq_p <- ggplot(irq, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Iraq") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(irq_p)
# Syria
syr <- flow_dest %>% filter(
  `Origin` == "Syrian Arab Rep.", `Population type` == "Asylum-seekers"
)
syr <- syr[, !(names(syr) %in% c("Origin", "Population type"))]

syr <- pivot_longer(syr, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
syr$`Country of asylum` <- as.factor(syr$`Country of asylum`)
syr$year <- as.numeric(syr$year)
syr$asylumseekers <- as.numeric(syr$asylumseekers)

syr_p <- ggplot(syr, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Syria") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(syr_p)
# Libya
lby <- flow_dest %>% filter(
  `Origin` == "Libya", `Population type` == "Asylum-seekers"
)
lby <- lby[, !(names(lby) %in% c("Origin", "Population type"))]

lby <- pivot_longer(lby, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
lby$`Country of asylum` <- as.factor(lby$`Country of asylum`)
lby$year <- as.numeric(lby$year)
lby$asylumseekers <- as.numeric(lby$asylumseekers)

lby_p <- ggplot(lby, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Libya") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(lby_p)
# Sudan
sdn <- flow_dest %>% filter(
  `Origin` == "Sudan", `Population type` == "Asylum-seekers"
)
sdn <- sdn[, !(names(sdn) %in% c("Origin", "Population type"))]

sdn <- pivot_longer(sdn, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
sdn$`Country of asylum` <- as.factor(sdn$`Country of asylum`)
sdn$year <- as.numeric(sdn$year)
sdn$asylumseekers <- as.numeric(sdn$asylumseekers)

sdn_p <- ggplot(sdn, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Sudan") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(sdn_p)
# South Sudan
ssd <- flow_dest %>% filter(
  `Origin` == "South Sudan", `Population type` == "Asylum-seekers"
)
ssd <- ssd[, !(names(ssd) %in% c("Origin", "Population type"))]

ssd <- pivot_longer(ssd, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
ssd$`Country of asylum` <- as.factor(ssd$`Country of asylum`)
ssd$year <- as.numeric(ssd$year)
ssd$asylumseekers <- as.numeric(ssd$asylumseekers)

ssd_p <- ggplot(ssd, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from South Sudan") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(ssd_p)
# Mali
mli <- flow_dest %>% filter(
  `Origin` == "Mali", `Population type` == "Asylum-seekers"
)
mli <- mli[, !(names(mli) %in% c("Origin", "Population type"))]

mli <- pivot_longer(mli, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
mli$`Country of asylum` <- as.factor(mli$`Country of asylum`)
mli$year <- as.numeric(mli$year)
mli$asylumseekers <- as.numeric(mli$asylumseekers)

mli_p <- ggplot(mli, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Mali") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(mli_p)
# Nigeria
nga <- flow_dest %>% filter(
  `Origin` == "Nigeria", `Population type` == "Asylum-seekers"
)
nga <- nga[, !(names(nga) %in% c("Origin", "Population type"))]

nga <- pivot_longer(nga, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
nga$`Country of asylum` <- as.factor(nga$`Country of asylum`)
nga$year <- as.numeric(nga$year)
nga$asylumseekers <- as.numeric(nga$asylumseekers)

nga_p <- ggplot(nga, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Nigeria") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(nga_p)
# Somalia
som <- flow_dest %>% filter(
  `Origin` == "Somalia", `Population type` == "Asylum-seekers"
)
som <- som[, !(names(som) %in% c("Origin", "Population type"))]

som <- pivot_longer(som, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
som$`Country of asylum` <- as.factor(som$`Country of asylum`)
som$year <- as.numeric(som$year)
som$asylumseekers <- as.numeric(som$asylumseekers)

som_p <- ggplot(som, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Somalia") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(som_p)
# Guatemala
gtm <- flow_dest %>% filter(
  `Origin` == "Guatemala", `Population type` == "Asylum-seekers"
)
gtm <- gtm[, !(names(gtm) %in% c("Origin", "Population type"))]

gtm <- pivot_longer(gtm, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
gtm$`Country of asylum` <- as.factor(gtm$`Country of asylum`)
gtm$year <- as.numeric(gtm$year)
gtm$asylumseekers <- as.numeric(gtm$asylumseekers)

gtm_p <- ggplot(gtm, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Guatemala") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(gtm_p)
# Venezuela
ven <- flow_dest %>% filter(
  `Origin` == "Venezuela (Bolivarian Republic of)", `Population type` == "Asylum-seekers"
)
ven <- ven[, !(names(ven) %in% c("Origin", "Population type"))]

ven <- pivot_longer(ven, # Make wide data long for easier plotting
                    cols = c(2:59),
                    names_to = "year",
                    values_to = "asylumseekers")
ven$`Country of asylum` <- as.factor(ven$`Country of asylum`)
ven$year <- as.numeric(ven$year)
ven$asylumseekers <- as.numeric(ven$asylumseekers)

ven_p <- ggplot(ven, aes(year, asylumseekers)) + geom_line(aes(colour=factor(`Country of asylum`))) +
  ggtitle("Destinations of Asylum Seekers from Venezuela") + ylab("Asylum Seekers") + xlab("Year")

ggplotly(ven_p)